1   package org.apache.lucene.search.postingshighlight;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one or more
5    * contributor license agreements.  See the NOTICE file distributed with
6    * this work for additional information regarding copyright ownership.
7    * The ASF licenses this file to You under the Apache License, Version 2.0
8    * (the "License"); you may not use this file except in compliance with
9    * the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  import org.apache.lucene.analysis.Analyzer;
21  import org.apache.lucene.analysis.MockAnalyzer;
22  import org.apache.lucene.analysis.MockTokenizer;
23  import org.apache.lucene.document.Document;
24  import org.apache.lucene.document.Field;
25  import org.apache.lucene.document.FieldType;
26  import org.apache.lucene.document.StoredField;
27  import org.apache.lucene.document.StringField;
28  import org.apache.lucene.document.TextField;
29  import org.apache.lucene.index.DirectoryReader;
30  import org.apache.lucene.index.IndexOptions;
31  import org.apache.lucene.index.IndexReader;
32  import org.apache.lucene.index.IndexWriterConfig;
33  import org.apache.lucene.index.RandomIndexWriter;
34  import org.apache.lucene.index.Term;
35  import org.apache.lucene.queries.CustomScoreQuery;
36  import org.apache.lucene.search.BooleanClause;
37  import org.apache.lucene.search.BooleanQuery;
38  import org.apache.lucene.search.IndexSearcher;
39  import org.apache.lucene.search.MatchAllDocsQuery;
40  import org.apache.lucene.search.PhraseQuery;
41  import org.apache.lucene.search.Query;
42  import org.apache.lucene.search.ScoreDoc;
43  import org.apache.lucene.search.Sort;
44  import org.apache.lucene.search.TermQuery;
45  import org.apache.lucene.search.TopDocs;
46  import org.apache.lucene.store.Directory;
47  import org.apache.lucene.util.LuceneTestCase;
48  
49  import java.io.BufferedReader;
50  import java.io.IOException;
51  import java.io.InputStreamReader;
52  import java.nio.charset.StandardCharsets;
53  import java.text.BreakIterator;
54  import java.util.Arrays;
55  import java.util.Map;
56  
57  public class TestPostingsHighlighter extends LuceneTestCase {
58    
59    public void testBasics() throws Exception {
60      Directory dir = newDirectory();
61      IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
62      iwc.setMergePolicy(newLogMergePolicy());
63      RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
64      
65      FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
66      offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
67      Field body = new Field("body", "", offsetsType);
68      Document doc = new Document();
69      doc.add(body);
70      
71      body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
72      iw.addDocument(doc);
73      body.setStringValue("Highlighting the first term. Hope it works.");
74      iw.addDocument(doc);
75      
76      IndexReader ir = iw.getReader();
77      iw.close();
78      
79      IndexSearcher searcher = newSearcher(ir);
80      PostingsHighlighter highlighter = new PostingsHighlighter();
81      Query query = new TermQuery(new Term("body", "highlighting"));
82      TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
83      assertEquals(2, topDocs.totalHits);
84      String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
85      assertEquals(2, snippets.length);
86      assertEquals("Just a test <b>highlighting</b> from postings. ", snippets[0]);
87      assertEquals("<b>Highlighting</b> the first term. ", snippets[1]);
88      
89      ir.close();
90      dir.close();
91    }
92  
93    public void testFormatWithMatchExceedingContentLength2() throws Exception {
94      
95      String bodyText = "123 TEST 01234 TEST";
96  
97      String[] snippets = formatWithMatchExceedingContentLength(bodyText);
98      
99      assertEquals(1, snippets.length);
100     assertEquals("123 <b>TEST</b> 01234 TE", snippets[0]);
101   }
102 
103   public void testFormatWithMatchExceedingContentLength3() throws Exception {
104     
105     String bodyText = "123 5678 01234 TEST TEST";
106     
107     String[] snippets = formatWithMatchExceedingContentLength(bodyText);
108     
109     assertEquals(1, snippets.length);
110     assertEquals("123 5678 01234 TE", snippets[0]);
111   }
112   
113   public void testFormatWithMatchExceedingContentLength() throws Exception {
114     
115     String bodyText = "123 5678 01234 TEST";
116     
117     String[] snippets = formatWithMatchExceedingContentLength(bodyText);
118     
119     assertEquals(1, snippets.length);
120     // LUCENE-5166: no snippet
121     assertEquals("123 5678 01234 TE", snippets[0]);
122   }
123 
124   private String[] formatWithMatchExceedingContentLength(String bodyText) throws IOException {
125     
126     int maxLength = 17;
127     
128     final Analyzer analyzer = new MockAnalyzer(random());
129     
130     Directory dir = newDirectory();
131     IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
132     iwc.setMergePolicy(newLogMergePolicy());
133     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
134     
135     final FieldType fieldType = new FieldType(TextField.TYPE_STORED);
136     fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
137     final Field body = new Field("body", bodyText, fieldType);
138     
139     Document doc = new Document();
140     doc.add(body);
141     
142     iw.addDocument(doc);
143     
144     IndexReader ir = iw.getReader();
145     iw.close();
146     
147     IndexSearcher searcher = newSearcher(ir);
148     
149     Query query = new TermQuery(new Term("body", "test"));
150     
151     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
152     assertEquals(1, topDocs.totalHits);
153     
154     PostingsHighlighter highlighter = new PostingsHighlighter(maxLength);
155     String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
156     
157     
158     ir.close();
159     dir.close();
160     return snippets;
161   }
162   
163   // simple test highlighting last word.
164   public void testHighlightLastWord() throws Exception {
165     Directory dir = newDirectory();
166     IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
167     iwc.setMergePolicy(newLogMergePolicy());
168     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
169     
170     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
171     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
172     Field body = new Field("body", "", offsetsType);
173     Document doc = new Document();
174     doc.add(body);
175     
176     body.setStringValue("This is a test");
177     iw.addDocument(doc);
178     
179     IndexReader ir = iw.getReader();
180     iw.close();
181     
182     IndexSearcher searcher = newSearcher(ir);
183     PostingsHighlighter highlighter = new PostingsHighlighter();
184     Query query = new TermQuery(new Term("body", "test"));
185     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
186     assertEquals(1, topDocs.totalHits);
187     String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
188     assertEquals(1, snippets.length);
189     assertEquals("This is a <b>test</b>", snippets[0]);
190     
191     ir.close();
192     dir.close();
193   }
194   
195   // simple test with one sentence documents.
196   public void testOneSentence() throws Exception {
197     Directory dir = newDirectory();
198     // use simpleanalyzer for more natural tokenization (else "test." is a token)
199     IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
200     iwc.setMergePolicy(newLogMergePolicy());
201     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
202     
203     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
204     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
205     Field body = new Field("body", "", offsetsType);
206     Document doc = new Document();
207     doc.add(body);
208     
209     body.setStringValue("This is a test.");
210     iw.addDocument(doc);
211     body.setStringValue("Test a one sentence document.");
212     iw.addDocument(doc);
213     
214     IndexReader ir = iw.getReader();
215     iw.close();
216     
217     IndexSearcher searcher = newSearcher(ir);
218     PostingsHighlighter highlighter = new PostingsHighlighter();
219     Query query = new TermQuery(new Term("body", "test"));
220     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
221     assertEquals(2, topDocs.totalHits);
222     String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
223     assertEquals(2, snippets.length);
224     assertEquals("This is a <b>test</b>.", snippets[0]);
225     assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
226     
227     ir.close();
228     dir.close();
229   }
230   
231   // simple test with multiple values that make a result longer than maxLength.
232   public void testMaxLengthWithMultivalue() throws Exception {
233     Directory dir = newDirectory();
234     // use simpleanalyzer for more natural tokenization (else "test." is a token)
235     IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
236     iwc.setMergePolicy(newLogMergePolicy());
237     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
238     
239     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
240     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
241     Document doc = new Document();
242     
243     for(int i = 0; i < 3 ; i++) {
244       Field body = new Field("body", "", offsetsType);
245       body.setStringValue("This is a multivalued field");
246       doc.add(body);
247     }
248     
249     iw.addDocument(doc);
250     
251     IndexReader ir = iw.getReader();
252     iw.close();
253     
254     IndexSearcher searcher = newSearcher(ir);
255     PostingsHighlighter highlighter = new PostingsHighlighter(40);
256     Query query = new TermQuery(new Term("body", "field"));
257     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
258     assertEquals(1, topDocs.totalHits);
259     String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
260     assertEquals(1, snippets.length);
261     assertTrue("Snippet should have maximum 40 characters plus the pre and post tags",
262         snippets[0].length() == (40 + "<b></b>".length()));
263     
264     ir.close();
265     dir.close();
266   }
267   
268   public void testMultipleFields() throws Exception {
269     Directory dir = newDirectory();
270     IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
271     iwc.setMergePolicy(newLogMergePolicy());
272     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
273     
274     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
275     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
276     Field body = new Field("body", "", offsetsType);
277     Field title = new Field("title", "", offsetsType);
278     Document doc = new Document();
279     doc.add(body);
280     doc.add(title);
281     
282     body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
283     title.setStringValue("I am hoping for the best.");
284     iw.addDocument(doc);
285     body.setStringValue("Highlighting the first term. Hope it works.");
286     title.setStringValue("But best may not be good enough.");
287     iw.addDocument(doc);
288     
289     IndexReader ir = iw.getReader();
290     iw.close();
291     
292     IndexSearcher searcher = newSearcher(ir);
293     PostingsHighlighter highlighter = new PostingsHighlighter();
294     BooleanQuery.Builder query = new BooleanQuery.Builder();
295     query.add(new TermQuery(new Term("body", "highlighting")), BooleanClause.Occur.SHOULD);
296     query.add(new TermQuery(new Term("title", "best")), BooleanClause.Occur.SHOULD);
297     TopDocs topDocs = searcher.search(query.build(), 10, Sort.INDEXORDER);
298     assertEquals(2, topDocs.totalHits);
299     Map<String,String[]> snippets = highlighter.highlightFields(new String [] { "body", "title" }, query.build(), searcher, topDocs);
300     assertEquals(2, snippets.size());
301     assertEquals("Just a test <b>highlighting</b> from postings. ", snippets.get("body")[0]);
302     assertEquals("<b>Highlighting</b> the first term. ", snippets.get("body")[1]);
303     assertEquals("I am hoping for the <b>best</b>.", snippets.get("title")[0]);
304     assertEquals("But <b>best</b> may not be good enough.", snippets.get("title")[1]);
305     ir.close();
306     dir.close();
307   }
308   
309   public void testMultipleTerms() throws Exception {
310     Directory dir = newDirectory();
311     IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
312     iwc.setMergePolicy(newLogMergePolicy());
313     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
314     
315     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
316     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
317     Field body = new Field("body", "", offsetsType);
318     Document doc = new Document();
319     doc.add(body);
320     
321     body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
322     iw.addDocument(doc);
323     body.setStringValue("Highlighting the first term. Hope it works.");
324     iw.addDocument(doc);
325     
326     IndexReader ir = iw.getReader();
327     iw.close();
328     
329     IndexSearcher searcher = newSearcher(ir);
330     PostingsHighlighter highlighter = new PostingsHighlighter();
331     BooleanQuery.Builder query = new BooleanQuery.Builder();
332     query.add(new TermQuery(new Term("body", "highlighting")), BooleanClause.Occur.SHOULD);
333     query.add(new TermQuery(new Term("body", "just")), BooleanClause.Occur.SHOULD);
334     query.add(new TermQuery(new Term("body", "first")), BooleanClause.Occur.SHOULD);
335     TopDocs topDocs = searcher.search(query.build(), 10, Sort.INDEXORDER);
336     assertEquals(2, topDocs.totalHits);
337     String snippets[] = highlighter.highlight("body", query.build(), searcher, topDocs);
338     assertEquals(2, snippets.length);
339     assertEquals("<b>Just</b> a test <b>highlighting</b> from postings. ", snippets[0]);
340     assertEquals("<b>Highlighting</b> the <b>first</b> term. ", snippets[1]);
341     
342     ir.close();
343     dir.close();
344   }
345   
346   public void testMultiplePassages() throws Exception {
347     Directory dir = newDirectory();
348     IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
349     iwc.setMergePolicy(newLogMergePolicy());
350     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
351     
352     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
353     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
354     Field body = new Field("body", "", offsetsType);
355     Document doc = new Document();
356     doc.add(body);
357     
358     body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
359     iw.addDocument(doc);
360     body.setStringValue("This test is another test. Not a good sentence. Test test test test.");
361     iw.addDocument(doc);
362     
363     IndexReader ir = iw.getReader();
364     iw.close();
365     
366     IndexSearcher searcher = newSearcher(ir);
367     PostingsHighlighter highlighter = new PostingsHighlighter();
368     Query query = new TermQuery(new Term("body", "test"));
369     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
370     assertEquals(2, topDocs.totalHits);
371     String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
372     assertEquals(2, snippets.length);
373     assertEquals("This is a <b>test</b>. Just a <b>test</b> highlighting from postings. ", snippets[0]);
374     assertEquals("This <b>test</b> is another <b>test</b>. ... <b>Test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[1]);
375     
376     ir.close();
377     dir.close();
378   }
379 
380   public void testUserFailedToIndexOffsets() throws Exception {
381     Directory dir = newDirectory();
382     IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
383     iwc.setMergePolicy(newLogMergePolicy());
384     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
385     
386     FieldType positionsType = new FieldType(TextField.TYPE_STORED);
387     positionsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
388     Field body = new Field("body", "", positionsType);
389     Field title = new StringField("title", "", Field.Store.YES);
390     Document doc = new Document();
391     doc.add(body);
392     doc.add(title);
393     
394     body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
395     title.setStringValue("test");
396     iw.addDocument(doc);
397     body.setStringValue("This test is another test. Not a good sentence. Test test test test.");
398     title.setStringValue("test");
399     iw.addDocument(doc);
400     
401     IndexReader ir = iw.getReader();
402     iw.close();
403     
404     IndexSearcher searcher = newSearcher(ir);
405     PostingsHighlighter highlighter = new PostingsHighlighter();
406     Query query = new TermQuery(new Term("body", "test"));
407     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
408     assertEquals(2, topDocs.totalHits);
409     try {
410       highlighter.highlight("body", query, searcher, topDocs, 2);
411       fail("did not hit expected exception");
412     } catch (IllegalArgumentException iae) {
413       // expected
414     }
415     
416     try {
417       highlighter.highlight("title", new TermQuery(new Term("title", "test")), searcher, topDocs, 2);
418       fail("did not hit expected exception");
419     } catch (IllegalArgumentException iae) {
420       // expected
421     }
422     ir.close();
423     dir.close();
424   }
425   
426   public void testBuddhism() throws Exception {
427     String text = "This eight-volume set brings together seminal papers in Buddhist studies from a vast " +
428                   "range of academic disciplines published over the last forty years. With a new introduction " + 
429                   "by the editor, this collection is a unique and unrivalled research resource for both " + 
430                   "student and scholar. Coverage includes: - Buddhist origins; early history of Buddhism in " + 
431                   "South and Southeast Asia - early Buddhist Schools and Doctrinal History; Theravada Doctrine " + 
432                   "- the Origins and nature of Mahayana Buddhism; some Mahayana religious topics - Abhidharma " + 
433                   "and Madhyamaka - Yogacara, the Epistemological tradition, and Tathagatagarbha - Tantric " + 
434                   "Buddhism (Including China and Japan); Buddhism in Nepal and Tibet - Buddhism in South and " + 
435                   "Southeast Asia, and - Buddhism in China, East Asia, and Japan.";
436     Directory dir = newDirectory();
437     Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
438     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer);
439     
440     FieldType positionsType = new FieldType(TextField.TYPE_STORED);
441     positionsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
442     Field body = new Field("body", text, positionsType);
443     Document document = new Document();
444     document.add(body);
445     iw.addDocument(document);
446     IndexReader ir = iw.getReader();
447     iw.close();
448     IndexSearcher searcher = newSearcher(ir);
449     PhraseQuery query = new PhraseQuery("body", "buddhist", "origins");
450     TopDocs topDocs = searcher.search(query, 10);
451     assertEquals(1, topDocs.totalHits);
452     PostingsHighlighter highlighter = new PostingsHighlighter();
453     String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
454     assertEquals(1, snippets.length);
455     assertTrue(snippets[0].contains("<b>Buddhist</b> <b>origins</b>"));
456     ir.close();
457     dir.close();
458   }
459   
460   public void testCuriousGeorge() throws Exception {
461     String text = "It’s the formula for success for preschoolers—Curious George and fire trucks! " + 
462                   "Curious George and the Firefighters is a story based on H. A. and Margret Rey’s " +
463                   "popular primate and painted in the original watercolor and charcoal style. " + 
464                   "Firefighters are a famously brave lot, but can they withstand a visit from one curious monkey?";
465     Directory dir = newDirectory();
466     Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
467     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer);
468     FieldType positionsType = new FieldType(TextField.TYPE_STORED);
469     positionsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
470     Field body = new Field("body", text, positionsType);
471     Document document = new Document();
472     document.add(body);
473     iw.addDocument(document);
474     IndexReader ir = iw.getReader();
475     iw.close();
476     IndexSearcher searcher = newSearcher(ir);
477     PhraseQuery query = new PhraseQuery("body", "curious", "george");
478     TopDocs topDocs = searcher.search(query, 10);
479     assertEquals(1, topDocs.totalHits);
480     PostingsHighlighter highlighter = new PostingsHighlighter();
481     String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
482     assertEquals(1, snippets.length);
483     assertFalse(snippets[0].contains("<b>Curious</b>Curious"));
484     ir.close();
485     dir.close();
486   }
487 
488   public void testCambridgeMA() throws Exception {
489     BufferedReader r = new BufferedReader(new InputStreamReader(
490                      this.getClass().getResourceAsStream("CambridgeMA.utf8"), StandardCharsets.UTF_8));
491     String text = r.readLine();
492     r.close();
493     Directory dir = newDirectory();
494     Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
495     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer);
496     FieldType positionsType = new FieldType(TextField.TYPE_STORED);
497     positionsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
498     Field body = new Field("body", text, positionsType);
499     Document document = new Document();
500     document.add(body);
501     iw.addDocument(document);
502     IndexReader ir = iw.getReader();
503     iw.close();
504     IndexSearcher searcher = newSearcher(ir);
505     BooleanQuery.Builder query = new BooleanQuery.Builder();
506     query.add(new TermQuery(new Term("body", "porter")), BooleanClause.Occur.SHOULD);
507     query.add(new TermQuery(new Term("body", "square")), BooleanClause.Occur.SHOULD);
508     query.add(new TermQuery(new Term("body", "massachusetts")), BooleanClause.Occur.SHOULD);
509     TopDocs topDocs = searcher.search(query.build(), 10);
510     assertEquals(1, topDocs.totalHits);
511     PostingsHighlighter highlighter = new PostingsHighlighter(Integer.MAX_VALUE-1);
512     String snippets[] = highlighter.highlight("body", query.build(), searcher, topDocs, 2);
513     assertEquals(1, snippets.length);
514     assertTrue(snippets[0].contains("<b>Square</b>"));
515     assertTrue(snippets[0].contains("<b>Porter</b>"));
516     ir.close();
517     dir.close();
518   }
519   
520   public void testPassageRanking() throws Exception {
521     Directory dir = newDirectory();
522     IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
523     iwc.setMergePolicy(newLogMergePolicy());
524     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
525     
526     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
527     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
528     Field body = new Field("body", "", offsetsType);
529     Document doc = new Document();
530     doc.add(body);
531     
532     body.setStringValue("This is a test.  Just highlighting from postings. This is also a much sillier test.  Feel free to test test test test test test test.");
533     iw.addDocument(doc);
534     
535     IndexReader ir = iw.getReader();
536     iw.close();
537     
538     IndexSearcher searcher = newSearcher(ir);
539     PostingsHighlighter highlighter = new PostingsHighlighter();
540     Query query = new TermQuery(new Term("body", "test"));
541     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
542     assertEquals(1, topDocs.totalHits);
543     String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
544     assertEquals(1, snippets.length);
545     assertEquals("This is a <b>test</b>.  ... Feel free to <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[0]);
546     
547     ir.close();
548     dir.close();
549   }
550 
551   public void testBooleanMustNot() throws Exception {
552     Directory dir = newDirectory();
553     Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
554     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer);
555     FieldType positionsType = new FieldType(TextField.TYPE_STORED);
556     positionsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
557     Field body = new Field("body", "This sentence has both terms.  This sentence has only terms.", positionsType);
558     Document document = new Document();
559     document.add(body);
560     iw.addDocument(document);
561     IndexReader ir = iw.getReader();
562     iw.close();
563     IndexSearcher searcher = newSearcher(ir);
564     BooleanQuery.Builder query = new BooleanQuery.Builder();
565     query.add(new TermQuery(new Term("body", "terms")), BooleanClause.Occur.SHOULD);
566     BooleanQuery.Builder query2 = new BooleanQuery.Builder();
567     query.add(query2.build(), BooleanClause.Occur.SHOULD);
568     query2.add(new TermQuery(new Term("body", "both")), BooleanClause.Occur.MUST_NOT);
569     TopDocs topDocs = searcher.search(query.build(), 10);
570     assertEquals(1, topDocs.totalHits);
571     PostingsHighlighter highlighter = new PostingsHighlighter(Integer.MAX_VALUE-1);
572     String snippets[] = highlighter.highlight("body", query.build(), searcher, topDocs, 2);
573     assertEquals(1, snippets.length);
574     assertFalse(snippets[0].contains("<b>both</b>"));
575     ir.close();
576     dir.close();
577   }
578 
579   public void testHighlightAllText() throws Exception {
580     Directory dir = newDirectory();
581     IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
582     iwc.setMergePolicy(newLogMergePolicy());
583     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
584     
585     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
586     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
587     Field body = new Field("body", "", offsetsType);
588     Document doc = new Document();
589     doc.add(body);
590     
591     body.setStringValue("This is a test.  Just highlighting from postings. This is also a much sillier test.  Feel free to test test test test test test test.");
592     iw.addDocument(doc);
593     
594     IndexReader ir = iw.getReader();
595     iw.close();
596     
597     IndexSearcher searcher = newSearcher(ir);
598     PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
599       @Override
600       protected BreakIterator getBreakIterator(String field) {
601         return new WholeBreakIterator();
602       }
603     };
604     Query query = new TermQuery(new Term("body", "test"));
605     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
606     assertEquals(1, topDocs.totalHits);
607     String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
608     assertEquals(1, snippets.length);
609     assertEquals("This is a <b>test</b>.  Just highlighting from postings. This is also a much sillier <b>test</b>.  Feel free to <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[0]);
610     
611     ir.close();
612     dir.close();
613   }
614 
615   public void testSpecificDocIDs() throws Exception {
616     Directory dir = newDirectory();
617     IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
618     iwc.setMergePolicy(newLogMergePolicy());
619     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
620     
621     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
622     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
623     Field body = new Field("body", "", offsetsType);
624     Document doc = new Document();
625     doc.add(body);
626     
627     body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
628     iw.addDocument(doc);
629     body.setStringValue("Highlighting the first term. Hope it works.");
630     iw.addDocument(doc);
631     
632     IndexReader ir = iw.getReader();
633     iw.close();
634     
635     IndexSearcher searcher = newSearcher(ir);
636     PostingsHighlighter highlighter = new PostingsHighlighter();
637     Query query = new TermQuery(new Term("body", "highlighting"));
638     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
639     assertEquals(2, topDocs.totalHits);
640     ScoreDoc[] hits = topDocs.scoreDocs;
641     int[] docIDs = new int[2];
642     docIDs[0] = hits[0].doc;
643     docIDs[1] = hits[1].doc;
644     String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, new int[] { 1 }).get("body");
645     assertEquals(2, snippets.length);
646     assertEquals("Just a test <b>highlighting</b> from postings. ", snippets[0]);
647     assertEquals("<b>Highlighting</b> the first term. ", snippets[1]);
648     
649     ir.close();
650     dir.close();
651   }
652 
653   public void testCustomFieldValueSource() throws Exception {
654     Directory dir = newDirectory();
655     IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
656     iwc.setMergePolicy(newLogMergePolicy());
657     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
658     
659     Document doc = new Document();
660 
661     FieldType offsetsType = new FieldType(TextField.TYPE_NOT_STORED);
662     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
663     final String text = "This is a test.  Just highlighting from postings. This is also a much sillier test.  Feel free to test test test test test test test.";
664     Field body = new Field("body", text, offsetsType);
665     doc.add(body);
666     iw.addDocument(doc);
667     
668     IndexReader ir = iw.getReader();
669     iw.close();
670     
671     IndexSearcher searcher = newSearcher(ir);
672 
673     PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
674         @Override
675         protected String[][] loadFieldValues(IndexSearcher searcher, String[] fields, int[] docids, int maxLength) throws IOException {
676           assert fields.length == 1;
677           assert docids.length == 1;
678           String[][] contents = new String[1][1];
679           contents[0][0] = text;
680           return contents;
681         }
682 
683         @Override
684         protected BreakIterator getBreakIterator(String field) {
685           return new WholeBreakIterator();
686         }
687       };
688 
689     Query query = new TermQuery(new Term("body", "test"));
690     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
691     assertEquals(1, topDocs.totalHits);
692     String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
693     assertEquals(1, snippets.length);
694     assertEquals("This is a <b>test</b>.  Just highlighting from postings. This is also a much sillier <b>test</b>.  Feel free to <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[0]);
695     
696     ir.close();
697     dir.close();
698   }
699 
700   /** Make sure highlighter returns first N sentences if
701    *  there were no hits. */
702   public void testEmptyHighlights() throws Exception {
703     Directory dir = newDirectory();
704     IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
705     iwc.setMergePolicy(newLogMergePolicy());
706     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
707     
708     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
709     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
710     Document doc = new Document();
711 
712     Field body = new Field("body", "test this is.  another sentence this test has.  far away is that planet.", offsetsType);
713     doc.add(body);
714     iw.addDocument(doc);
715     
716     IndexReader ir = iw.getReader();
717     iw.close();
718     
719     IndexSearcher searcher = newSearcher(ir);
720     PostingsHighlighter highlighter = new PostingsHighlighter();
721     Query query = new TermQuery(new Term("body", "highlighting"));
722     int[] docIDs = new int[] {0};
723     String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, new int[] { 2 }).get("body");
724     assertEquals(1, snippets.length);
725     assertEquals("test this is.  another sentence this test has.  ", snippets[0]);
726 
727     ir.close();
728     dir.close();
729   }
730 
731   /** Make sure highlighter we can customize how emtpy
732    *  highlight is returned. */
733   public void testCustomEmptyHighlights() throws Exception {
734     Directory dir = newDirectory();
735     IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
736     iwc.setMergePolicy(newLogMergePolicy());
737     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
738     
739     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
740     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
741     Document doc = new Document();
742 
743     Field body = new Field("body", "test this is.  another sentence this test has.  far away is that planet.", offsetsType);
744     doc.add(body);
745     iw.addDocument(doc);
746     
747     IndexReader ir = iw.getReader();
748     iw.close();
749     
750     IndexSearcher searcher = newSearcher(ir);
751     PostingsHighlighter highlighter = new PostingsHighlighter() {
752         @Override
753         public Passage[] getEmptyHighlight(String fieldName, BreakIterator bi, int maxPassages) {
754           return new Passage[0];
755         }
756       };
757     Query query = new TermQuery(new Term("body", "highlighting"));
758     int[] docIDs = new int[] {0};
759     String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, new int[] { 2 }).get("body");
760     assertEquals(1, snippets.length);
761     assertNull(snippets[0]);
762 
763     ir.close();
764     dir.close();
765   }
766 
767   /** Make sure highlighter returns whole text when there
768    *  are no hits and BreakIterator is null. */
769   public void testEmptyHighlightsWhole() throws Exception {
770     Directory dir = newDirectory();
771     IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
772     iwc.setMergePolicy(newLogMergePolicy());
773     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
774     
775     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
776     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
777     Document doc = new Document();
778 
779     Field body = new Field("body", "test this is.  another sentence this test has.  far away is that planet.", offsetsType);
780     doc.add(body);
781     iw.addDocument(doc);
782     
783     IndexReader ir = iw.getReader();
784     iw.close();
785     
786     IndexSearcher searcher = newSearcher(ir);
787     PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
788       @Override
789       protected BreakIterator getBreakIterator(String field) {
790         return new WholeBreakIterator();
791       }
792     };
793     Query query = new TermQuery(new Term("body", "highlighting"));
794     int[] docIDs = new int[] {0};
795     String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, new int[] { 2 }).get("body");
796     assertEquals(1, snippets.length);
797     assertEquals("test this is.  another sentence this test has.  far away is that planet.", snippets[0]);
798 
799     ir.close();
800     dir.close();
801   }
802 
803   /** Make sure highlighter is OK with entirely missing
804    *  field. */
805   public void testFieldIsMissing() throws Exception {
806     Directory dir = newDirectory();
807     IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
808     iwc.setMergePolicy(newLogMergePolicy());
809     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
810     
811     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
812     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
813     Document doc = new Document();
814 
815     Field body = new Field("body", "test this is.  another sentence this test has.  far away is that planet.", offsetsType);
816     doc.add(body);
817     iw.addDocument(doc);
818     
819     IndexReader ir = iw.getReader();
820     iw.close();
821     
822     IndexSearcher searcher = newSearcher(ir);
823     PostingsHighlighter highlighter = new PostingsHighlighter();
824     Query query = new TermQuery(new Term("bogus", "highlighting"));
825     int[] docIDs = new int[] {0};
826     String snippets[] = highlighter.highlightFields(new String[] {"bogus"}, query, searcher, docIDs, new int[] { 2 }).get("bogus");
827     assertEquals(1, snippets.length);
828     assertNull(snippets[0]);
829 
830     ir.close();
831     dir.close();
832   }
833 
834   public void testFieldIsJustSpace() throws Exception {
835     Directory dir = newDirectory();
836     IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
837     iwc.setMergePolicy(newLogMergePolicy());
838     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
839     
840     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
841     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
842 
843     Document doc = new Document();
844     doc.add(new Field("body", "   ", offsetsType));
845     doc.add(new Field("id", "id", offsetsType));
846     iw.addDocument(doc);
847 
848     doc = new Document();
849     doc.add(new Field("body", "something", offsetsType));
850     iw.addDocument(doc);
851     
852     IndexReader ir = iw.getReader();
853     iw.close();
854     
855     IndexSearcher searcher = newSearcher(ir);
856     PostingsHighlighter highlighter = new PostingsHighlighter();
857     int docID = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc;
858 
859     Query query = new TermQuery(new Term("body", "highlighting"));
860     int[] docIDs = new int[1];
861     docIDs[0] = docID;
862     String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, new int[] { 2 }).get("body");
863     assertEquals(1, snippets.length);
864     assertEquals("   ", snippets[0]);
865 
866     ir.close();
867     dir.close();
868   }
869 
870   public void testFieldIsEmptyString() throws Exception {
871     Directory dir = newDirectory();
872     IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
873     iwc.setMergePolicy(newLogMergePolicy());
874     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
875     
876     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
877     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
878 
879     Document doc = new Document();
880     doc.add(new Field("body", "", offsetsType));
881     doc.add(new Field("id", "id", offsetsType));
882     iw.addDocument(doc);
883 
884     doc = new Document();
885     doc.add(new Field("body", "something", offsetsType));
886     iw.addDocument(doc);
887     
888     IndexReader ir = iw.getReader();
889     iw.close();
890     
891     IndexSearcher searcher = newSearcher(ir);
892     PostingsHighlighter highlighter = new PostingsHighlighter();
893     int docID = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc;
894 
895     Query query = new TermQuery(new Term("body", "highlighting"));
896     int[] docIDs = new int[1];
897     docIDs[0] = docID;
898     String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, new int[] { 2 }).get("body");
899     assertEquals(1, snippets.length);
900     assertNull(snippets[0]);
901 
902     ir.close();
903     dir.close();
904   }
905 
906   public void testMultipleDocs() throws Exception {
907     Directory dir = newDirectory();
908     IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
909     iwc.setMergePolicy(newLogMergePolicy());
910     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
911     
912     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
913     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
914 
915     int numDocs = atLeast(100);
916     for(int i=0;i<numDocs;i++) {
917       Document doc = new Document();
918       String content = "the answer is " + i;
919       if ((i & 1) == 0) {
920         content += " some more terms";
921       }
922       doc.add(new Field("body", content, offsetsType));
923       doc.add(newStringField("id", ""+i, Field.Store.YES));
924       iw.addDocument(doc);
925 
926       if (random().nextInt(10) == 2) {
927         iw.commit();
928       }
929     }
930 
931     IndexReader ir = iw.getReader();
932     iw.close();
933     
934     IndexSearcher searcher = newSearcher(ir);
935     PostingsHighlighter highlighter = new PostingsHighlighter();
936     Query query = new TermQuery(new Term("body", "answer"));
937     TopDocs hits = searcher.search(query, numDocs);
938     assertEquals(numDocs, hits.totalHits);
939 
940     String snippets[] = highlighter.highlight("body", query, searcher, hits);
941     assertEquals(numDocs, snippets.length);
942     for(int hit=0;hit<numDocs;hit++) {
943       Document doc = searcher.doc(hits.scoreDocs[hit].doc);
944       int id = Integer.parseInt(doc.get("id"));
945       String expected = "the <b>answer</b> is " + id;
946       if ((id  & 1) == 0) {
947         expected += " some more terms";
948       }
949       assertEquals(expected, snippets[hit]);
950     }
951 
952     ir.close();
953     dir.close();
954   }
955   
956   public void testMultipleSnippetSizes() throws Exception {
957     Directory dir = newDirectory();
958     IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
959     iwc.setMergePolicy(newLogMergePolicy());
960     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
961     
962     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
963     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
964     Field body = new Field("body", "", offsetsType);
965     Field title = new Field("title", "", offsetsType);
966     Document doc = new Document();
967     doc.add(body);
968     doc.add(title);
969     
970     body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
971     title.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
972     iw.addDocument(doc);
973     
974     IndexReader ir = iw.getReader();
975     iw.close();
976     
977     IndexSearcher searcher = newSearcher(ir);
978     PostingsHighlighter highlighter = new PostingsHighlighter();
979     BooleanQuery.Builder query = new BooleanQuery.Builder();
980     query.add(new TermQuery(new Term("body", "test")), BooleanClause.Occur.SHOULD);
981     query.add(new TermQuery(new Term("title", "test")), BooleanClause.Occur.SHOULD);
982     Map<String,String[]> snippets = highlighter.highlightFields(new String[] { "title", "body" }, query.build(), searcher, new int[] { 0 }, new int[] { 1, 2 });
983     String titleHighlight = snippets.get("title")[0];
984     String bodyHighlight = snippets.get("body")[0];
985     assertEquals("This is a <b>test</b>. ", titleHighlight);
986     assertEquals("This is a <b>test</b>. Just a <b>test</b> highlighting from postings. ", bodyHighlight);
987     ir.close();
988     dir.close();
989   }
990   
991   public void testEncode() throws Exception {
992     Directory dir = newDirectory();
993     IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
994     iwc.setMergePolicy(newLogMergePolicy());
995     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
996     
997     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
998     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
999     Field body = new Field("body", "", offsetsType);
1000     Document doc = new Document();
1001     doc.add(body);
1002     
1003     body.setStringValue("This is a test. Just a test highlighting from <i>postings</i>. Feel free to ignore.");
1004     iw.addDocument(doc);
1005     
1006     IndexReader ir = iw.getReader();
1007     iw.close();
1008     
1009     IndexSearcher searcher = newSearcher(ir);
1010     PostingsHighlighter highlighter = new PostingsHighlighter() {
1011       @Override
1012       protected PassageFormatter getFormatter(String field) {
1013         return new DefaultPassageFormatter("<b>", "</b>", "... ", true);
1014       }
1015     };
1016     Query query = new TermQuery(new Term("body", "highlighting"));
1017     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
1018     assertEquals(1, topDocs.totalHits);
1019     String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
1020     assertEquals(1, snippets.length);
1021     assertEquals("Just&#32;a&#32;test&#32;<b>highlighting</b>&#32;from&#32;&lt;i&gt;postings&lt;&#x2F;i&gt;&#46;&#32;", snippets[0]);
1022     
1023     ir.close();
1024     dir.close();
1025   }
1026   
1027   /** customizing the gap separator to force a sentence break */
1028   public void testGapSeparator() throws Exception {
1029     Directory dir = newDirectory();
1030     // use simpleanalyzer for more natural tokenization (else "test." is a token)
1031     IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
1032     iwc.setMergePolicy(newLogMergePolicy());
1033     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
1034     
1035     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
1036     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
1037     Document doc = new Document();
1038     
1039     Field body1 = new Field("body", "", offsetsType);
1040     body1.setStringValue("This is a multivalued field");
1041     doc.add(body1);
1042     
1043     Field body2 = new Field("body", "", offsetsType);
1044     body2.setStringValue("This is something different");
1045     doc.add(body2);
1046     
1047     iw.addDocument(doc);
1048     
1049     IndexReader ir = iw.getReader();
1050     iw.close();
1051     
1052     IndexSearcher searcher = newSearcher(ir);
1053     PostingsHighlighter highlighter = new PostingsHighlighter() {
1054       @Override
1055       protected char getMultiValuedSeparator(String field) {
1056         assert field.equals("body");
1057         return '\u2029';
1058       }
1059     };
1060     Query query = new TermQuery(new Term("body", "field"));
1061     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
1062     assertEquals(1, topDocs.totalHits);
1063     String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
1064     assertEquals(1, snippets.length);
1065     assertEquals("This is a multivalued <b>field</b>\u2029", snippets[0]);
1066     
1067     ir.close();
1068     dir.close();
1069   }
1070 
1071   // LUCENE-4906
1072   public void testObjectFormatter() throws Exception {
1073     Directory dir = newDirectory();
1074     IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
1075     iwc.setMergePolicy(newLogMergePolicy());
1076     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
1077     
1078     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
1079     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
1080     Field body = new Field("body", "", offsetsType);
1081     Document doc = new Document();
1082     doc.add(body);
1083     
1084     body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
1085     iw.addDocument(doc);
1086     
1087     IndexReader ir = iw.getReader();
1088     iw.close();
1089     
1090     IndexSearcher searcher = newSearcher(ir);
1091     PostingsHighlighter highlighter = new PostingsHighlighter() {
1092       @Override
1093       protected PassageFormatter getFormatter(String field) {
1094         return new PassageFormatter() {
1095           PassageFormatter defaultFormatter = new DefaultPassageFormatter();
1096 
1097           @Override
1098           public String[] format(Passage passages[], String content) {
1099             // Just turns the String snippet into a length 2
1100             // array of String
1101             return new String[] {"blah blah", defaultFormatter.format(passages, content).toString()};
1102           }
1103         };
1104       }
1105     };
1106 
1107     Query query = new TermQuery(new Term("body", "highlighting"));
1108     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
1109     assertEquals(1, topDocs.totalHits);
1110     int[] docIDs = new int[1];
1111     docIDs[0] = topDocs.scoreDocs[0].doc;
1112     Map<String,Object[]> snippets = highlighter.highlightFieldsAsObjects(new String[]{"body"}, query, searcher, docIDs, new int[] {1});
1113     Object[] bodySnippets = snippets.get("body");
1114     assertEquals(1, bodySnippets.length);
1115     assertTrue(Arrays.equals(new String[] {"blah blah", "Just a test <b>highlighting</b> from postings. "}, (String[]) bodySnippets[0]));
1116     
1117     ir.close();
1118     dir.close();
1119   }
1120 
1121   public void testFieldSometimesMissingFromSegment() throws Exception {
1122     Directory dir = newDirectory();
1123     IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
1124     iwc.setMergePolicy(newLogMergePolicy());
1125     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
1126     
1127     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
1128     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
1129     Field body = new Field("body", "foo", offsetsType);
1130     Document doc = new Document();
1131     doc.add(body);
1132     iw.addDocument(doc);
1133 
1134     // Make a 2nd segment where body is only stored:
1135     iw.commit();
1136     doc = new Document();
1137     doc.add(new StoredField("body", "foo"));
1138     iw.addDocument(doc);
1139     
1140     IndexReader ir = DirectoryReader.open(iw.w, true);
1141     iw.close();
1142     
1143     IndexSearcher searcher = new IndexSearcher(ir);
1144     PostingsHighlighter highlighter = new PostingsHighlighter();
1145     Query query = new MatchAllDocsQuery();
1146     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
1147     assertEquals(2, topDocs.totalHits);
1148     String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
1149     assertEquals(2, snippets.length);
1150     assertEquals("foo", snippets[0]);
1151     assertNull(snippets[1]);
1152     ir.close();
1153     dir.close();
1154   }
1155 
1156   public void testCustomScoreQueryHighlight() throws Exception{
1157     Directory dir = newDirectory();
1158     IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
1159     iwc.setMergePolicy(newLogMergePolicy());
1160     RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
1161     
1162     FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
1163     offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
1164     Field body = new Field("body", "", offsetsType);
1165     Document doc = new Document();
1166     doc.add(body);
1167     
1168     body.setStringValue("This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy");
1169     iw.addDocument(doc);
1170     
1171     IndexReader ir = iw.getReader();
1172     iw.close();
1173 
1174     TermQuery termQuery = new TermQuery(new Term("body", "very"));
1175     PostingsHighlighter highlighter = new PostingsHighlighter();
1176     CustomScoreQuery query = new CustomScoreQuery(termQuery);
1177 
1178     IndexSearcher searcher = newSearcher(ir);
1179     TopDocs hits = searcher.search(query, 10);
1180     assertEquals(1, hits.totalHits);
1181 
1182     String snippets[] = highlighter.highlight("body", query, searcher, hits);
1183     assertEquals(1, snippets.length);
1184     assertEquals("This piece of text refers to Kennedy at the beginning then has a longer piece of text that is <b>very</b> long in the middle and finally ends with another reference to Kennedy",
1185                  snippets[0]);
1186 
1187     ir.close();
1188     dir.close();
1189   }
1190 }